
//table 1 summary statistics 
	
	use "shb_sampledata.dta", clear 

	sum employee_salary, d
	sum employee_salary if female== 0, d
	sum employee_salary if female== 1, d
	
	tab employee_earliest_year_flag
	tab female
	
	gsort employee_id year -employee_salary_annual
	duplicates drop employee_id year, force
	destring employee_id, replace
	bysort state: egen max_state_year = max(year) 
	tsset employee_id year 

	gen new_hire = 1 if l.employee_id ==. & year > min_state_year
	replace new_hire = 0 if new_hire ==. & year > min_state_year

	gen stay = 1 if l.employee_id !=. 
	replace stay = 0 if stay ==. & year != min_state_year & year != min_emp_year
	gen departure = 1-stay

	sum female, d
	sum departure, d
	sum departure if female== 0, d
	sum departure if female== 1, d

*New Hires Only 
	keep if employee_earliest_year_flag ==1
	sum employee_salary if female== 0, d
	sum employee_salary if female== 1, d

//table 2

*New hires only 
	use "shb_sampledata.dta", clear
	keep if employee_earliest_year_flag == 1 & year > min_state_year

	reghdfe log_salary_annual treatment female, absorb(state year soc) cluster(state) keepsingletons
	reghdfe log_salary_annual treatment female, absorb(statesoc socyr) cluster(state) keepsingletons
	reghdfe log_salary_annual treatment female, absorb(statesoc socyr statefemale femaleyr socfemale) cluster(state) keepsingletons


*Continuing employees only 
	use "shb_sampledata.dta", clear
	keep if employee_earliest_year_flag == 0 & year > min_state_year
	
	reghdfe log_salary_annual treatment female, absorb(state year soc) cluster(state) keepsingletons
	reghdfe log_salary_annual treatment female, absorb(statesoc socyr) cluster(state) keepsingletons
	reghdfe log_salary_annual treatment female, absorb(statesoc socyr statefemale femaleyr socfemale) cluster(state) keepsingletons

	
//table 3
	use "shb_sampledata.dta", clear

* New hires only
	keep if employee_earliest_year_flag == 1 & year > min_state_year
	
* Calculate averages and variances and percentages

* 1: Average Salary by Position
	tempvar temp
	bysort soc (state): egen `temp' = mean(employee_salary_annual) if inrange(year, 2014, 2015)
	by soc: egen mean_salary_by_position = max(`temp')
	drop `temp'

* 2: Salary Variance by Position 
	tempvar temp
	by soc: egen `temp' = sd(employee_salary_annual) if inrange(year, 2014, 2015)
	by soc: egen var_salary_by_position  = max(`temp')
	replace var_salary_by_position = var_salary_by_position^2
	drop `temp'
	
* 3: Fraction of Women by Position
	tempvar temp
	by soc: egen `temp' = mean(female) if inrange(year, 2014, 2015) 
	by soc: egen perc_women_by_position = max(`temp')
	drop `temp'

* 4: Previous gender wage gap 
	preserve
	keep if year == 2014 | year == 2015 
	sort soc female
	by soc female: egen avg_salary = mean(employee_salary) 
	duplicates drop soc female, force
	keep soc female avg_salary
	drop if soc == ""
	drop if female ==. 
	reshape wide avg_salary, i(soc) j(female)
	gen wage_gap = avg_salary0 - avg_salary1
	save "prior_wage_gap.dta", replace
	restore 
	
	merge m:1 soc using "prior_wage_gap.dta", force
	drop _m 
	
* Generate percentiles  
	foreach y in var_salary wage_gap perc_women {
	egen med_`y' = median(`y') 
	gen abovemed_`y' = (`y' > med_`y')
	}

* Merge in unionization
	gen soc_5 = substr(soc, 1,6) 
	merge m:1 soc_5 using "union_soc_2014.dta", force 
	drop if _m == 2
	drop _m 
	egen med_coverage = median(Cov) 
	gen abovemed_unioncov = (Cov > med_coverage)
	
* Merge in private public overlap 
	merge m:1 soc_5 using "soc4_gov_overlap.dta", force
	drop if _m == 2
	drop _m 
	egen med_overlap = median(private_overlap)
	gen abovemed_private_overlap = (private_overlap > med_overlap)
	drop private_overlap
	drop med_overlap 

	compress
	save "shb_newhire_heterogeneity.dta", replace 

* Continuing employees 
	use "shb_sampledata.dta", clear
	keep if employee_earliest_year_flag == 0 & year > min_state_year 

* Merge in unionization
	gen soc_5 = substr(soc, 1,6) 
	merge m:1 soc_5 using "union_soc_2014.dta", force 
	drop if _m == 2
	drop _m 
	egen med_coverage = median(Cov) 
	gen abovemed_unioncov = (Cov > med_coverage)	
	save "shb_continuing_heterogeneity.dta", replace 

*Regressions

*New Hires
	use "shb_newhire_heterogeneity.dta", clear 

	reghdfe log_salary_annual c.treatment##c.abovemed_unioncov c.female##c.abovemed_unioncov, absorb(state year soc ) cluster(state) keepsingletons
	reghdfe log_salary_annual c.treatment##c.abovemed_unioncov c.female##c.abovemed_unioncov, absorb(statesoc socyr) cluster(state) keepsingletons
	reghdfe log_salary_annual c.treatment##c.abovemed_unioncov c.female##c.abovemed_unioncov, absorb(statesoc socyr female statefemale femaleyr socfemale) cluster(state) keepsingletons

*Continuing Employees
	use "shb_continuing_heterogeneity.dta", clear 
	
	reghdfe log_salary_annual c.treatment##c.abovemed_unioncov c.female##c.abovemed_unioncov, absorb(state year soc ) cluster(state) keepsingletons
	reghdfe log_salary_annual c.treatment##c.abovemed_unioncov c.female##c.abovemed_unioncov, absorb(statesoc socyr) cluster(state) keepsingletons
	reghdfe log_salary_annual c.treatment##c.abovemed_unioncov c.female##c.abovemed_unioncov, absorb(statesoc socyr female statefemale femaleyr socfemale) cluster(state) keepsingletons

	
//table 4
*New hires only 
	use "shb_sampledata.dta", clear
	keep if employee_earliest_year_flag == 1 & year > min_state_year

	reghdfe log_salary_annual  c.treatment##c.female, absorb(state year soc) cluster(state) keepsingletons
	reghdfe log_salary_annual  c.treatment##c.female, absorb(socyr statesoc) cluster(state) keepsingletons
	reghdfe log_salary_annual  c.treatment##c.female, absorb(socyr statesoc statefemale femaleyr stateyr socfemale) cluster(state) keepsingletons

*Continuing employees only 
	use "shb_sampledata.dta", clear
	keep if employee_earliest_year_flag == 0 & year > min_state_year
	
	reghdfe log_salary_annual c.treatment##c.female, absorb(state year soc ) cluster(state) keepsingletons
	reghdfe log_salary_annual c.treatment##c.female, absorb(statesoc socyr) cluster(state) keepsingletons
	reghdfe log_salary_annual c.treatment##c.female, absorb(statesoc socyr female statefemale femaleyr socfemale) cluster(state) keepsingletons

//table 5
	use "shb_newhire_heterogeneity.dta", clear 
	
	forvalues x = 0/1{
	reghdfe log_salary_annual treatment female if  abovemed_var_salary== `x', absorb(statesoc socyr) cluster(state) keepsingletons
	reghdfe log_salary_annual treatment if  abovemed_var_salary== `x',  absorb(statesoc socyr statefemale femaleyr socfemale) cluster(state) keepsingletons
	reghdfe log_salary_annual c.treatment##c.female if  abovemed_var_salary== `x',  absorb(statesoc socyr) cluster(state) keepsingletons
	reghdfe log_salary_annual c.treatment##c.female if  abovemed_var_salary== `x',  absorb(statesoc socyr statefemale femaleyr stateyr socfemale) cluster(state) keepsingletons
	}

//table 6
	use "shb_newhire_heterogeneity.dta", clear 

	forvalues x = 0/1{
	reghdfe log_salary_annual treatment female if abovemed_wage_gap == `x', absorb(statesoc socyr statefemale femaleyr socfemale) cluster(state) keepsingletons
	reghdfe log_salary_annual c.treatment##c.female if abovemed_wage_gap == `x',  absorb(statesoc socyr female statefemale femaleyr stateyr socfemale) cluster(state) keepsingletons
	}

	forvalues x = 0/1{
	reghdfe log_salary_annual treatment female if abovemed_perc_women == `x', absorb(statesoc socyr statefemale femaleyr socfemale) cluster(state) keepsingletons	
	reghdfe log_salary_annual c.treatment##c.female if abovemed_perc_women== `x',  absorb(statesoc socyr female statefemale femaleyr stateyr socfemale) cluster(state) keepsingletons
	}

//table 7
	use "shb_turnover.dta", clear

	reghdfe departure treatment female, absorb(state year soc) cluster(state) keepsingletons
	reghdfe departure treatment female, absorb(statesoc socyr) cluster(state) keepsingletons
	reghdfe departure treatment female, absorb(statesoc socyr female statefemale femaleyr socfemale) cluster(state) keepsingletons
	reghdfe departure c.treatment##c.female, absorb(state year soc) cluster(state) keepsingletons
	reghdfe departure c.treatment##c.female, absorb(statesoc socyr) cluster(state) keepsingletons
	reghdfe departure c.treatment##c.female, absorb(statesoc socyr female statefemale femaleyr stateyr socfemale) cluster(state) keepsingletons
	

//table 8

* 1: Above and below median salary 
	bysort soc: egen median_salary_by_position = median(employee_salary_annual)
	generate byte flag_gt_med_by_pos = employee_salary_annual >= median_salary_by_position
	replace flag_gt_med_by_pos = . if median_salary_by_position == .
	
* 2: Salary Variance by Position 
	tempvar temp
	by soc: egen `temp' = sd(employee_salary_annual) if inrange(year, 2014, 2015)
	by soc: egen var_salary_by_position  = max(`temp')
	replace var_salary_by_position = var_salary_by_position^2
	count if missing(var_salary_by_position) == 1
	drop `temp'
	
* 3: Fraction of Women by Position
	by soc: egen `temp' = mean(female) if inrange(year, 2014, 2015) 
	by soc: egen perc_women_by_position = max(`temp')
	drop `temp'

* 4: Previous gender wage gap 
	preserve
	keep if year == 2014 | year == 2015 
	sort soc female
	by soc female: egen avg_salary = mean(employee_salary) 
	duplicates drop soc female, force
	keep soc female avg_salary
	drop if soc == ""
	drop if female ==. 
	reshape wide avg_salary, i(soc) j(female)
	gen wage_gap = avg_salary0 - avg_salary1
	save "wage_gap.dta", replace
	restore 
	
	merge m:1 soc using "wage_gap.dta", force
	drop _m 
	
	foreach y in var_salary wage_gap perc_women {
	egen med_`y' = median(`y') 
	gen abovemed_`y' = (`y' > med_`y')
	}

	
* Merge in unionization
	gen soc_5 = substr(soc, 1,6) 
	merge m:1 soc_5 using "union_soc_2014.dta", force 
	drop if _m == 2
	drop _m 
	egen med_coverage = median(Cov) 
	gen abovemed_unioncov = (Cov > med_coverage)

*Regressions 

	foreach y in flag_gt_med_by_pos abovemed_unioncov abovemed_var_salary abovemed_wage_gap abovemed_perc_women{
	forvalues x = 0/1{
	reghdfe departure treatment female if `y'== `x', absorb(statesoc socyr statefemale femaleyr socfemale) cluster(state) keepsingletons
	reghdfe departure c.treatment##c.female if  `y'== `x',  absorb(socyr statesoc statefemale femaleyr stateyr socfemale) cluster(state) keepsingletons
	}
	}


	
//table 9
	use "shb_sampledata.dta", clear

*dynamic indicators 
	keep state year treatment statefe 
	duplicates drop state year, force
	tsset statefe year 
	tsfill, full
	ssc install carryforward
	bysort statefe: carryforward treatment, replace
	bysort statefe: carryforward state, replace
	gsort statefe -year 
	bysort statefe: carryforward treatment, replace
	bysort statefe: carryforward state, replace

	isid state year, s
	by state: g temp=(treatment<treatment[_n-1] & treatment[_n-1]~=.)
	egen switchoff=max(temp), by(state)
	drop temp
	by state: g temp=(treatment>treatment[_n-1] & treatment[_n-1]~=.)
	egen switchon=max(temp), by(state)
	drop temp
	egen max_treatment=max(treatment), by(state)
	egen min_treatment=min(treatment), by(state)
	g alwaysoff=(max_treatment==0 & switchoff==0)
	g alwayson=(min_treatment==1 & switchoff==0)
	sum alwaysoff alwayson switchon switchoff
	tab switchon switchoff

* define event_year
	egen temp=min(year) if treatment==1 & switchon, by(state)
	egen event_yr=max(temp), by(state) 
	drop temp
	assert event_yr==. if switchon==0
	assert event_yr~=. if switchon==1
	g event_rel_yr=year-event_yr
	bys state (year): g temp=sum(treatment)
	g switchedoffyears=temp==temp[_n-1] & temp[_n-1]>=1 & temp[_n-1]~=.
	replace event_rel_yr=. if switchedoffyears==1

* generate dynamic year indicator 
	g disclosure_f0=(event_rel_yr==0)
	g disclosure_postf0 = (event_rel_yr > 0 & event_rel_yr<.)
	forv t=1(1)4 {
		g disclosure_l`t'=(event_rel_yr==-`t')
		g disclosure_prel`t'=(event_rel_yr<-`t' & event_rel_yr<.)
	}

	forv t=1(1)3 {
		g disclosure_f`t'=(event_rel_yr==`t')
		g disclosure_postf`t'=(event_rel_yr>`t' & event_rel_yr<.)
	}

	
	foreach v of varlist disclosure_* {
		replace `v'=. if switchedoffyears==1
	}

	save "dynamic_indicators.dta", replace 

*regression
	use "shb_sampledata.dta", clear

	keep if employee_earliest_year_flag == 1 & year > min_state_year 
	
	merge m:1 state year using "dynamic_indicators.dta", force
	keep if _m == 3
	drop _m 

//Panel A
	global dynamics_treat disclosure_prel3 disclosure_l2 disclosure_l1 disclosure_f0 disclosure_postf0

	reghdfe log_salary_annual ${dynamics_treat} female, absorb(state year soc) cluster(statefe) keepsingletons
	reghdfe log_salary_annual ${dynamics_treat} female, absorb(socyr statesoc) cluster(statefe) keepsingletons
	reghdfe log_salary_annual ${dynamics_treat}, absorb(socyr statesoc statefemale femaleyr socfemale) cluster(statefe) keepsingletons

//Panel B
	global dynamics c.disclosure_prel3##c.female c.disclosure_l2##c.female c.disclosure_l1##c.female  c.disclosure_f0##c.female c.disclosure_postf0##c.female 

	reghdfe log_salary_annual ${dynamics}, absorb(state year soc) cluster(statefe) keepsingletons	
	reghdfe log_salary_annual ${dynamics}, absorb(socyr statesoc) cluster(statefe) keepsingletons
	reghdfe log_salary_annual ${dynamics}, absorb(socyr statesoc statefemale femaleyr socfemale stateyr) cluster(statefe) keepsingletons

//table 10
	use "shb_newhire_heterogeneity.dta", clear 

	forvalues i = 0/1{
	reghdfe log_salary_annual treatment female if abovemed_private_overlap == `i', absorb(statesoc socyr  statefemale femaleyr socfemale) cluster(state) keepsingletons
	reghdfe log_salary_annual c.treatment##c.female if abovemed_private_overlap == `i',  absorb(socyr statesoc statefemale femaleyr stateyr socfemale) cluster(state) keepsingletons
	}

//table 11
	use "CPS_reform_panel.dta",clear
	drop if year < 2014 

	global controls i.white i.age_bucket i.college

// panel A
	foreach y in log_wage {
	reghdfe `y' treatment female, absorb(stfips date occ2012) cluster(stfips)
	reghdfe `y' treatment female  $controls, absorb(stfips date occ2012 ind02) cluster(stfips)
	reghdfe `y' c.treatment##c.female, absorb(stfips date occ2012) cluster(stfips)
	reghdfe `y' c.treatment##c.female  $controls, absorb(stfips date occ2012 ind02) cluster(stfips)
	}

// panel B
	foreach y in log_wage {
	reghdfe `y' c.treatment##c.female $controls if public == 0 , absorb(stfips date occ2012 ind02) cluster(stfips)
	reghdfe `y' c.treatment##c.female $controls if public == 1 , absorb(stfips date occ2012 ind02) cluster(stfips)
	}
	
	foreach y in log_wage{
	foreach z in abovemed_unioncov_soc union{
	forvalues x=0/1{
	reghdfe `y' c.treatment##c.female  $controls if `z' == `x', absorb(stfips date occ2012 ind02) cluster(stfips)
	}
	}
	}
	

//table 12

	use "shb_sample_race.dta", clear
	drop if year < 2014
	keep if employee_earliest_year_flag == 1 & year > min_state_year

	gen white = (pctwhite > 95)
	replace white = . if pctwhite ==. 
	gen black = (pctblack > 95)
	replace black = . if pctblack ==.
	keep if black == 1 | white == 1
	replace black = 0 if white == 1 & black != 1

	egen stateblack = group(state black)
	egen blackyr = group(black year)
	egen socblack = group(soc black) 
	egen blackfemale = group(black female) 
	egen stateblackfemale = group(state black female)
	egen socblackfemale = group(soc black female)
	egen blackfemaleyr = group(black female year) 


*Regressions

	reghdfe log_salary_annual treatment black female, absorb(statesoc socyr) cluster(state) keepsingletons	
	reghdfe log_salary_annual treatment black female , absorb(statesoc socyr statefemale femaleyr socfemale) cluster(state) keepsingletons
	reghdfe log_salary_annual c.treatment##c.black c.treatment##c.female, absorb(statesoc socyr) cluster(state) keepsingletons
	reghdfe log_salary_annual c.treatment##c.black c.treatment##c.female ,  absorb(stateyr statesoc socyr statefemale femaleyr socfemale stateblack blackyr socblack) cluster(state) keepsingletons
	
//table 13
	use "shb_sampledata.dta", clear 
	
*new employees only 
	keep if employee_earliest_year_flag == 1 & year > min_state_year

	reghdfe log_salary_annual treatment female, absorb(state year employee_position) cluster(state) keepsingletons
	reghdfe log_salary_annual treatment female, absorb(stateposition positionyr) cluster(state) keepsingletons
	reghdfe log_salary_annual treatment female, absorb(stateposition positionyr statefemale femaleyr positionfemale) cluster(state) keepsingletons
	reghdfe log_salary_annual c.treatment##c.female, absorb(state year employee_position) cluster(state) keepsingletons
	reghdfe log_salary_annual c.treatment##c.female, absorb(stateposition positionyr) cluster(state) keepsingletons
	reghdfe log_salary_annual c.treatment##c.female, absorb(stateposition positionyr  statefemale femaleyr stateyr positionfemale) cluster(state) keepsingletons

